In [11]:
# !pip install dfply
from dfply import * 
import networkx as nx
import matplotlib.pyplot as plt
from bokeh.io import show, output_notebook
from bokeh.models import Plot, Range1d, MultiLine, Circle
from bokeh.models.graphs import from_networkx
from bokeh.transform import factor_cmap
from bokeh.transform import linear_cmap
import pandas as pd
from bokeh.palettes import Spectral11, Colorblind
from math import ceil
from math import pow
from bokeh.models import (BoxZoomTool, Circle, HoverTool,PanTool,
                          MultiLine, Plot, Range1d, ResetTool, NodesAndLinkedEdges,TapTool)
import warnings
warnings.filterwarnings("ignore")
In [12]:
transfers_df = pd.read_pickle("./../../Capstone/Data/Clean/Transfers_Network.pkl")
transfers_df = transfers_df.sort_values("date",ascending = True) 
transfers_df = transfers_df.drop_duplicates()
transfers_df = transfers_df.dropna()

Network's preparation functions

In [13]:
def box_cox_normalization(node_size):
    size = 0.5
    l = 0.5
    compressed_point = (pow(node_size, l) - 1) / l 
    return ceil(size*compressed_point)


def z_score(input_v, avg , sd):
  return (input_v - avg) / sd


def edge_color(t_type):
  summer, winter, mid = "orange", "black", "red"
  if(t_type == "summer"):
    return summer
  elif(t_type == "winter"):
    return winter
  else:
    return mid

def transfers_detailed(transfers_df):
  teams_spent = transfers_df.groupby(['to','season']).apply(lambda x: x.fee.sum())
  teams_spent = teams_spent.reset_index(inplace = False, drop = False)
  teams_spent.rename({0:"spent"}, axis = "columns", inplace = True)

  teams_received = transfers_df.groupby(['from','season']).apply(lambda x: x.fee.sum())
  teams_received = teams_received.reset_index(inplace = False, drop = False)
  teams_received.rename({0:"received"}, axis = "columns", inplace = True)

  leagues_spent = transfers_df.groupby(['to_league','season']).apply(lambda x: x.fee.sum())
  leagues_spent = leagues_spent.reset_index(inplace = False, drop = False)
  leagues_spent.rename({0:"spent_league"}, axis = "columns", inplace = True)

  leagues_received = transfers_df.groupby(['from_league','season']).apply(lambda x: x.fee.sum())
  leagues_received = leagues_received.reset_index(inplace = False, drop = False)
  leagues_received.rename({0:"received_league"}, axis = "columns", inplace = True)

  transfers_df = pd.merge(transfers_df, teams_spent, on = ["to","season"])
  transfers_df = pd.merge(transfers_df, teams_received, on = ["from","season"])
  transfers_df = pd.merge(transfers_df, leagues_spent, on = ["to_league","season"])
  transfers_df = pd.merge(transfers_df, leagues_received, on = ["from_league","season"])

  return transfers_df

def nodes_attributes(transfers_df):
  from_info = transfers_df[["from","from_country","from_cont","from_league","from_league_class"]].drop_duplicates("from")
  from_info.rename(lambda x: x.replace("from_",""), axis = "columns", inplace = True)
  from_info.rename({"from":"club"}, axis = "columns", inplace = True)

  to_info = transfers_df[["to","to_country","to_cont","to_league","to_league_class"]].drop_duplicates("to")
  to_info.rename(lambda x: x.replace("to_",""), axis = "columns", inplace = True)
  to_info.rename({"to":"club"}, axis = "columns", inplace = True)

  nodes_attr = pd.merge(from_info, to_info, on = ["club","country","cont","league","league_class"]).drop_duplicates()

  from_received = (transfers_df >> 
  group_by(X["from"]) >> 
  summarize( received = X.received.sum()
            ))
  from_received.rename({"from":"club"}, axis = "columns", inplace = True)
  to_spent = (transfers_df >> 
  group_by(X["to"]) >> 
  summarize( spent = X.spent.sum()
            ))
  to_spent.rename({"to":"club"}, axis = "columns", inplace = True)
  spent_received = pd.merge(from_received, to_spent, on = ["club"]).drop_duplicates()  
  nodes_attr = pd.merge(nodes_attr, spent_received, on = ["club"])
  nodes_attr['profit'] = nodes_attr['received'] - nodes_attr['spent']

  return nodes_attr

def set_nodes_attributes(G, nodes_attr):
  nodes_cont = dict(zip(nodes_attr.club, nodes_attr.cont))
  nodes_country = dict(zip(nodes_attr.club, nodes_attr.country))
  nodes_league = dict(zip(nodes_attr.club, nodes_attr.league))
  nodes_league_class = dict(zip(nodes_attr.club, nodes_attr.league_class))
  nodes_received = dict(zip(nodes_attr.club, nodes_attr.received))
  nodes_spent = dict(zip(nodes_attr.club, nodes_attr.spent))
  nodes_profit = dict(zip(nodes_attr.club, nodes_attr.profit))
  names = dict(map(lambda node: (node[0], (node[0])), dict(G.degree).items()))

  nx.set_node_attributes(G, nodes_cont, 'continent')
  nx.set_node_attributes(G, nodes_country, 'country')
  nx.set_node_attributes(G, nodes_league, 'league')
  nx.set_node_attributes(G, nodes_received, 'received')
  nx.set_node_attributes(G, nodes_spent, 'spent')
  nx.set_node_attributes(G, nodes_profit, 'profit')
  nx.set_node_attributes(G, nodes_league_class, 'league_class')
  nx.set_node_attributes(G, names, 'name')

  new_sizes = dict(map(lambda node: (node[0], box_cox_normalization(node[1])), dict(G.degree).items()))
  degrees = dict(map(lambda node: (node[0], (node[1])), dict(G.degree).items()))
  fees_dict = dict(nx.get_edge_attributes(G,"fee")).items() 
  ages_dict = dict(nx.get_edge_attributes(G,"age")).items()
  types_dict = dict(nx.get_edge_attributes(G,"type")).items()

  fees = np.array(list(dict(nx.get_edge_attributes(G,"fee")).values()))
  ages = np.array(list(dict(nx.get_edge_attributes(G,"age")).values()))

  avg_fee = np.mean(fees)
  sd_fee = np.std(fees)
  
  new_fees = dict(map(lambda edge: ( edge[0], z_score(edge[1], avg_fee, sd_fee) ), fees_dict ))
  new_ages = dict(map(lambda edge: ( edge[0], z_score(edge[1], avg_fee, sd_fee) ), fees_dict ))
  edge_colors = dict(map(lambda edge: ( edge[0], edge_color(edge[1]) ), types_dict ))

  nx.set_node_attributes(G, dict(G.degree), 'connections')
  nx.set_node_attributes(G, new_sizes, 'node_size')
  nx.set_node_attributes(G, degrees, 'degree')
  nx.set_edge_attributes(G, new_fees, 'edge_width')
  nx.set_edge_attributes(G, new_ages, 'edge_alpha')
  nx.set_edge_attributes(G, edge_colors, 'edge_color')
  nx.set_node_attributes(G,dict(G.out_degree()),"outgoing_edges")
  nx.set_node_attributes(G,dict(G.in_degree()),"incoming_edges")
  return G
In [14]:
def group_league(transfers_df):
  by_leagues = (transfers_df >> 
  group_by(X.from_league, X.to_league, X["type"]) >>
  summarize(fee = X.fee.sum(), count = X.index.nunique(),
            age = X.age.mean(),
            from_country = X.from_country.unique()[0],
            from_league_class = X.from_league_class.unique()[0],
            from_cont = X.from_cont.unique()[0],
            from_league_received = X.received_league.sum(),
            to_country = X.to_country.unique()[0],
            to_league_class = X.to_league_class.unique()[0],
            to_cont = X.to_cont.unique()[0],
            to_league_spent = X.spent_league.sum(),                   
            ))
  return by_leagues

def league_node_attrs(by_leagues):
  from_league_info = (by_leagues >> 
                    group_by(X.from_league) >> 
                    summarize( 
                        country = X.from_country.unique()[0],
                        cont = X.from_cont.unique()[0],
                        league_class = X.from_league_class.unique()[0],
                        received = X.from_league_received.unique()[0],
                     ))
  from_league_info.rename({"from_league":"league"},axis = "columns", inplace = True)
  to_league_info = (by_leagues >> 
                      group_by(X.to_league) >> 
                      summarize( 
                          country = X.to_country.unique()[0],
                          cont = X.to_cont.unique()[0],
                          league_class = X.to_league_class.unique()[0],
                          spent = X.to_league_spent.unique()[0],
                      ))
  to_league_info.rename({"to_league":"league"},axis = "columns", inplace = True)
  node_attrs = pd.merge(from_league_info,to_league_info, on = ["league","league_class","country","cont"])
  node_attrs["profit"] = node_attrs["received"] - node_attrs["spent"]
  return node_attrs

def league_attributes(G, node_attrs):
  node_names = dict(map(lambda node: (node[0], (node[0])), dict(G.degree).items()))
  node_country = dict(zip(node_attrs.league, node_attrs.country))
  node_cont = dict(zip(node_attrs.league, node_attrs.cont))
  node_league_class = dict(zip(node_attrs.league, node_attrs.league_class))
  node_received = dict(zip(node_attrs.league, node_attrs.received))
  node_spent = dict(zip(node_attrs.league, node_attrs.spent))
  node_profit = dict(zip(node_attrs.league, node_attrs.profit))
  new_sizes = dict(map(lambda node: (node[0], box_cox_normalization(node[1])), dict(G.degree).items()))
  degrees = dict(map(lambda node: (node[0], node[1]), dict(G.degree).items()))

  nx.set_node_attributes(G, node_names, "name")
  nx.set_node_attributes(G, node_country, "country")
  nx.set_node_attributes(G, node_cont, "continent")
  nx.set_node_attributes(G, node_league_class, "league_class")
  nx.set_node_attributes(G, node_received, "received")
  nx.set_node_attributes(G, node_spent, "spent")
  nx.set_node_attributes(G, node_profit, "profit")
  nx.set_node_attributes(G, degrees,'degree')
  nx.set_node_attributes(G,dict(G.out_degree()),"outgoing_edges")
  nx.set_node_attributes(G,dict(G.in_degree()),"incoming_edges")

  types_dict = dict(nx.get_edge_attributes(G,"type")).items()
  edge_colors = dict(map(lambda edge: ( edge[0], edge_color(edge[1]) ), types_dict))
  nx.set_edge_attributes(G, edge_colors, 'edge_color')

  fees = np.array(list(dict(nx.get_edge_attributes(G,"fee")).values()))
  avg_fee = np.mean(fees)
  sd_fee = np.std(fees)
  fees_dict = dict(nx.get_edge_attributes(G,"fee")).items() 
  ages_dict = dict(nx.get_edge_attributes(G,"age")).items()
  nx.set_node_attributes(G, new_sizes, 'node_size')
  new_fees = dict(map(lambda edge: ( edge[0], z_score(edge[1], avg_fee, sd_fee) ), fees_dict ))
  new_ages = dict(map(lambda edge: ( edge[0], z_score(edge[1], avg_fee, sd_fee) ), fees_dict ))
  nx.set_edge_attributes(G, new_fees, 'edge_width')
  nx.set_edge_attributes(G, new_ages, 'edge_alpha')
  nx.set_node_attributes(G, dict(G.degree), 'connections')

  return G
In [15]:
def get_subgraph(G, attr, value):
  filtered = [x for x,y in transfers_G.nodes(data=True) if y[attr] in (value)]
  subgraph = transfers_G.subgraph(filtered)
  return subgraph


def plot_net(G,title,**kwargs):
  node_color = kwargs["node_color"]
  colors = list(set(nx.get_node_attributes(G,node_color).values()))
  node_size = kwargs["node_size"]
  hover_data = []
  edge_alpha = kwargs["edge_alpha"]
  edge_width = kwargs["edge_width"]
  edge_color = kwargs["edge_color"]
  
  for attr in kwargs["hover_data"]:
    name = "@"+attr
    hover_data.append((attr, name))

  plot = Plot(plot_width=700, plot_height=500,
              x_range = Range1d(-1.1, 1.1), y_range=Range1d(-1.1, 1.1))
  plot.title.text = title

  node_hover_tool = HoverTool(tooltips = hover_data)
  plot.add_tools(node_hover_tool, BoxZoomTool(), ResetTool(), PanTool(),TapTool())
  layout = kwargs["layout"]

  graph_renderer = from_networkx(G, layout, scale=1, center=(0, 0))
  graph_renderer.node_renderer.glyph = Circle(size = node_size, fill_color = factor_cmap(node_color,Colorblind[7], colors))

  graph_renderer.edge_renderer.glyph = MultiLine(
      line_alpha = edge_alpha, 
      line_width = edge_width,
      line_color = edge_color
  )
  graph_renderer.edge_renderer.selection_glyph = MultiLine(line_color='blue', line_width=edge_width)
  graph_renderer.selection_policy = NodesAndLinkedEdges()

  plot.renderers.append(graph_renderer)
  output_notebook()
  show(plot)

def call_plot(G,title,layout,node_size = "node_size", node_color = "continent", edge_color = "edge_color",
          edge_alpha = "edge_alpha", edge_width = "edge_width"):
  # 
  hover_data = ["connections","country","continent","league_class","name","spent","received","profit","incoming_edges","outgoing_edges"]
  plot_net(G,title,node_size = node_size, node_color = node_color, edge_color = edge_color,
          edge_alpha = edge_alpha, edge_width = edge_width, hover_data = hover_data, layout = layout)
In [27]:
def network_stats(G):
  print("Network's density")
  print(nx.density(G))

  print("Network's reciprocity")
  print(nx.reciprocity(G))

  print("Network's assortavity based on continent")
  print(nx.attribute_assortativity_coefficient(G,'continent'))

  print("Network's assortavity based on league_class")
  print(nx.attribute_assortativity_coefficient(G,'league_class'))

  print("Network's assortavity based on country")
  print(nx.attribute_assortativity_coefficient(G,'country'))

  print("Network's assortavity based on degrees")
  print(nx.degree_assortativity_coefficient(G))


def network_info(G):
  out_df = pd.DataFrame.from_dict(dict(list(G.out_degree)), orient = "index").reset_index()
  out_df.columns = ['club','outgoing_count']
  print("outgoing edges")
  print(out_df.sort_values("outgoing_count",ascending = False))

  in_df = pd.DataFrame.from_dict(dict(list(G.in_degree)), orient = "index").reset_index()
  in_df.columns = ['club','incoming_count']
  print("incoming edges")
  print(in_df.sort_values("incoming_count",ascending = False))

  degree_df = pd.DataFrame.from_dict(nx.degree_centrality(G),orient = "index").reset_index()
  degree_df.columns = ['club','centrality']
  print("degree centrality")
  print(degree_df.sort_values('centrality',ascending = False))

  in_degree_df = pd.DataFrame.from_dict(nx.in_degree_centrality(G),orient = "index").reset_index()
  in_degree_df.columns = ['club','in_centrality']
  print("incoming degree centrality")
  print(in_degree_df.sort_values('in_centrality',ascending = False))

  out_degree_df = pd.DataFrame.from_dict(nx.out_degree_centrality(G),orient = "index").reset_index()
  out_degree_df.columns = ['club','out_centrality']
  print("outgoing degree centrality")
  print(out_degree_df.sort_values('out_centrality',ascending = False))

  profit_df = pd.DataFrame.from_dict(nx.get_node_attributes(G, "profit"), orient = "index").reset_index()
  profit_df.columns = ['club','profit']
  print("Profit made by teams")
  print(profit_df.sort_values('profit',ascending = False))
In [17]:
edges_cols = ['loan','type','fee','mv','name','continent','nationality','main_field_position',
                    'field_position','age','season','date']
transfers_df = transfers_detailed(transfers_df)
nodes_attr = nodes_attributes(transfers_df)
clubs = nodes_attr.club.unique()
transfers_df = transfers_df[transfers_df["from"].isin(clubs) & transfers_df["to"].isin(clubs)]
transfers_G = nx.from_pandas_edgelist(transfers_df, 'from','to', edges_cols, create_using=nx.MultiDiGraph)
transfers_G = set_nodes_attributes(transfers_G, nodes_attr)
remove = [node for node,degree in dict(transfers_G.degree()).items() if degree < 18]
transfers_G.remove_nodes_from(remove)

Soccer's Transfers Network by teams

In [18]:
call_plot(transfers_G, "Soccer Transfers Network", nx.spring_layout)
Loading BokehJS ...

As the networks contains a lot of nodes, we cannot get much information from the visualization. However we can see that the european teams dominate in the market, with majority of them connected with each other, and also we can see that after europen teams, and majority of the transfers happen during the summer transfer window, except for some Asian teams, whick make most of their deals during the winter transfer window.

In [19]:
from networkx.algorithms import community
communities_generator = community.girvan_newman(transfers_G)
In [20]:
next_level_communities = next(communities_generator)
next_1_level_communities = next(communities_generator)
next_2_level_communities = next(communities_generator)
next_3_level_communities = next(communities_generator)
In [21]:
next_4_level_communities = next(communities_generator)
next_5_level_communities = next(communities_generator)
In [22]:
len(next_5_level_communities)
Out[22]:
7
In [23]:
i = 1
for community in next_5_level_communities:
  for node in community:
    transfers_G.nodes()[node]["community"] = str("community") + str(i)
  i = i + 1
In [24]:
call_plot(transfers_G, "Soccer Transfers Network Communities", nx.spring_layout, node_color = "community")
Loading BokehJS ...

Network Statistics

In [28]:
network_stats(transfers_G)
Network's density
0.08127920560747663
Network's reciprocity
0.6419930530602468
Network's assortavity based on continent
0.6276592157476384
Network's assortavity based on league_class
0.3793460193657123
Network's assortavity based on country
0.5635132471197509
Network's assortavity based on degrees
0.2706955281730688

As we can see the networks density is very low, which is logical as we have many teams, and not all of them have connections between each other. However the reciprocity of the network is relatively high, as most of the teams that make deals with each other have transfers in opposite directions too. The main attributes for the assortiativity of the teams is their continent and country, as it is easier for player's to move to another team that is in the same continent where they play, and even more when it happens in the same country. The metric is around 0.5, as most of the talented players from other continents and non EU countries tend to move to european soccer clubs, as there they have higher chances of succeeding. League's class has the lowest effect on the assortiativity as most of the time players from leagues with lower ranking tend to move to higher ranked leagues. Degree of the node also has relatively low connection to the assortativity of the nodes, as teams with a low number of connections not always are connected to teams with a lot of connections.

In [29]:
network_info(transfers_G)
outgoing edges
                club  outgoing_count
35           Chelsea             131
54          FC Porto             107
47          Juventus              92
21           Benfica              92
65   Atlético Madrid              92
..               ...             ...
196           Kansas               4
301          Göztepe               4
299        HN Jianye               3
312        LA Galaxy               3
156     Sligo Rovers               2

[321 rows x 2 columns]
incoming edges
             club  incoming_count
35        Chelsea             135
103         Genoa             107
54       FC Porto             100
21        Benfica             100
46     Fiorentina              99
..            ...             ...
59      Gimnàstic               5
247     Haugesund               4
243  Cerezo Osaka               4
260    Norrköping               4
156  Sligo Rovers               2

[321 rows x 2 columns]
degree centrality
                club  centrality
35           Chelsea    0.831250
54          FC Porto    0.646875
103            Genoa    0.606250
21           Benfica    0.600000
46        Fiorentina    0.596875
..               ...         ...
299        HN Jianye    0.034375
196           Kansas    0.034375
317  BJ Sinobo Guoan    0.031250
247        Haugesund    0.028125
156     Sligo Rovers    0.012500

[321 rows x 2 columns]
incoming degree centrality
             club  in_centrality
35        Chelsea       0.421875
103         Genoa       0.334375
54       FC Porto       0.312500
21        Benfica       0.312500
46     Fiorentina       0.309375
..            ...            ...
59      Gimnàstic       0.015625
247     Haugesund       0.012500
243  Cerezo Osaka       0.012500
260    Norrköping       0.012500
156  Sligo Rovers       0.006250

[321 rows x 2 columns]
outgoing degree centrality
                club  out_centrality
35           Chelsea        0.409375
54          FC Porto        0.334375
47          Juventus        0.287500
21           Benfica        0.287500
65   Atlético Madrid        0.287500
..               ...             ...
196           Kansas        0.012500
301          Göztepe        0.012500
299        HN Jianye        0.009375
312        LA Galaxy        0.009375
156     Sligo Rovers        0.006250

[321 rows x 2 columns]
Profit made by teams
             club        profit
21        Benfica  7.482310e+09
54       FC Porto  4.798530e+09
24           Ajax  4.562820e+09
89    RB Salzburg  3.646170e+09
103         Genoa  2.718610e+09
..            ...           ...
20    Real Madrid -5.527050e+09
62   FC Barcelona -6.223430e+09
35        Chelsea -7.302390e+09
9         Man Utd -8.154325e+09
5        Man City -1.592600e+10

[321 rows x 2 columns]

We can see that the most central team of the network is Chelsea, and top 5 contains other Italian teams, and the lowest centrality is among not popular teams. Almost the same situation for in degree and out degree centrality. Finally we can see that most that the profitable teams are Benfica, Porto and Ajax, as they are famous in the whole world for rising and selling young talents, whereas the top teams such as Man City and Barcelona are on lowest places in terms of making profits from transfers.

In [30]:
loans_df = transfers_df[transfers_df.loan == True]
# loans_df = transfers_detailed(loans_df)
nodes_attr = nodes_attributes(loans_df)
clubs = nodes_attr.club.unique()
loans_df = loans_df[loans_df["from"].isin(clubs) & loans_df["to"].isin(clubs)]
loans_G = nx.from_pandas_edgelist(loans_df, 'from','to', edges_cols, create_using=nx.MultiDiGraph)
loans_G = set_nodes_attributes(loans_G, nodes_attr)
remove = [node for node,degree in dict(loans_G.degree()).items() if degree < 15]
loans_G.remove_nodes_from(remove)
In [31]:
transfers_df
Out[31]:
date tm_id from to fee mv season loan from_league to_league ... field_position main_field_position age type from_league_class to_league_class spent received spent_league received_league
12 2005-11-01 13796 Rotherham Sunderland 0.0 400000.0 05/06 True League One League One ... CM midfielders 19.0 mid_seson lower lower 150000.0 0.0 650000.0 3000000.0
13 2005-09-01 13796 Sunderland Rotherham 0.0 400000.0 05/06 True League One League One ... CM midfielders 19.0 mid_seson lower lower 0.0 0.0 650000.0 3000000.0
16 2005-08-01 29975 Southampton Bournemouth 0.0 75000.0 05/06 True Premier League Premier League ... CM midfielders 18.0 summer top top 0.0 375000.0 8075000.0 5450000.0
17 2006-05-01 22231 Southampton Liverpool 0.0 100000.0 05/06 True Premier League Premier League ... CM midfielders 21.0 mid_seson top top 0.0 375000.0 8075000.0 5450000.0
18 2006-01-31 25384 Southampton Man City 375000.0 50000.0 05/06 False Premier League Premier League ... CB defenders 19.0 winter top top 375000.0 375000.0 8075000.0 5450000.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
32431 2018-09-26 204381 Stirling Lions Newcastle 0.0 250000.0 18/19 False A-League A-League ... GK goalkeepers 27.0 mid_seson low low 55750000.0 0.0 300000.0 4665000.0
32432 2018-07-09 345343 Wellington P. Willem II 0.0 200000.0 18/19 False A-League Eredivisie ... LB defenders 20.0 summer low top 0.0 0.0 68640000.0 4665000.0
32439 2018-07-01 43117 Melbourne City Heart of Midl. 0.0 750000.0 18/19 False A-League Premiership ... CM midfielders 29.0 summer low low 0.0 890000.0 7542000.0 4665000.0
32440 2018-08-09 399997 Melbourne City Man City 890000.0 1500000.0 18/19 False A-League Premiership ... LW attackers 19.0 summer low low 72290000.0 890000.0 7542000.0 4665000.0
32443 2018-08-18 37372 Ahli Hibernian FC 0.0 850000.0 18/19 False A-League Premiership ... CDM midfielders 33.0 summer low low 200000.0 0.0 7542000.0 4665000.0

10107 rows × 29 columns

Soccer's Loans Network by teams

In [32]:
call_plot(loans_G, "Soccer Loans' Network", nx.spring_layout)
Loading BokehJS ...

Network Statistics

The situation is almost the same situation in terms of main teams of the network. However we can see that, more young players are involved in loans taking place during the winter transfer window(black color for window, low opacity for young players). Also we can see two teams from South Korea that are isolated

In [33]:
network_stats(loans_G)
Network's density
0.12051841085271318
Network's reciprocity
0.9517587939698492
Network's assortavity based on continent
0.760472154720812
Network's assortavity based on league_class
0.4174418132669892
Network's assortavity based on country
0.593723122850382
Network's assortavity based on degrees
0.1505344934169114

Almost the same metrics as for transfers network, except almost maximal value for reciprocity, which is logical as in most of the cases player who is loaned to another club comes back to his club, and only in some cases the club that loaned the player buys him.

In [34]:
by_leagues = group_league(transfers_df)
node_attrs = league_node_attrs(by_leagues)
leagues_G = nx.from_pandas_edgelist(by_leagues, 'from_league','to_league', ['fee','count','age','type'], create_using=nx.MultiDiGraph)
leagues_G = league_attributes(leagues_G, node_attrs)

Soccer's Transfers Network by leagues

In [35]:
call_plot(leagues_G, "Soccer Transfers' Network by Leagues", nx.spring_layout, node_size = "node_size")
Loading BokehJS ...

If we take the leagues as nodes, we can see that teams form the South Korean league have the lowest connection to the other leagues' teams. In general the graph is very interconnected and almost all of the leagues have direct links to each other.

Network Statistics

In [ ]:
network_stats(leagues_G)

As we can see the density of the network is very high, and as it is a multigraph, where two nodes can have more than one edge between them. As we have taken the league as a node, the reciprocity is very high compared to the graph, where the nodes were the teams. In terms of assortiativity the highest effect has the continent of the leagues, and degree of the node has the lowest effect, which is even negative.

In [ ]:
network_info(leagues_G)

The main member of the network is Netherland's league in terms of outgoing transfers. Championship premier league and mls have the highest number of incoming edges. Eredivisie has the highest centrality measures and Championship has the highest in degree centrality measure. Bundesliga makes the most profit in transfers, while Premier League teams spend much more than they receive in transfers.

In [37]:
leagues_loans = transfers_df[transfers_df.loan]
by_leagues = group_league(leagues_loans)
node_attrs = league_node_attrs(by_leagues)
leagues_loans_G = nx.from_pandas_edgelist(by_leagues, 'from_league','to_league', ['fee','count','age','type'], create_using=nx.MultiDiGraph)
leagues_loans_G = league_attributes(leagues_loans_G, node_attrs)

Soccer's Loans Network by leagues

In [38]:
call_plot(leagues_loans_G, "Soccer's Loans Network by leagues", nx.spring_layout, node_size = 'node_size')
Loading BokehJS ...

The loans network is very similar to the transfers network but is less dense.

Network Statistics

In [39]:
network_stats(leagues_loans_G)
Network's density
0.8770161290322581
Network's reciprocity
0.5310344827586206
Network's assortavity based on continent
0.22545168892380194
Network's assortavity based on league_class
0.11977030352748158
Network's assortavity based on country
0.1253083604332791
Network's assortavity based on degrees
0.08173399198896894

The main difference in the metrics except lower density and reciprocity, is that the assortiativity degree based on country is higher, as most of the teams loan out their players to lower leagues in their country, so that they can gain gaming practice.

In [40]:
network_info(leagues_loans_G)
outgoing edges
                   club  outgoing_count
9        Premier League              58
4            Bundesliga              53
20              Serie A              52
13             Liga NOS              48
5          Championship              42
28            Superliga              40
12               LaLiga              39
6            Eredivisie              38
7               Ligue 1              37
24              Série A              35
18   Jupiler Pro League              35
11         Super League              32
19              LaLiga2              29
14          Superligaen              26
17          Eliteserien              26
8                   MLS              25
23           League One              25
0         2. Bundesliga              24
25            Süper Lig              23
16              Serie B              22
30              Ligue 2              21
15            J1 League              17
10          Premiership              17
27     Liga MX Clausura              16
31       Liga DIMAYOR I              16
1               3. Liga              13
26           League Two              12
3           Allsvenskan              11
21          Ekstraklasa              10
2              A-League              10
29  Professional League              10
22           K League 1               8
incoming edges
                   club  incoming_count
19              LaLiga2              45
9        Premier League              45
13             Liga NOS              43
12               LaLiga              42
5          Championship              42
6            Eredivisie              41
7               Ligue 1              40
20              Serie A              35
18   Jupiler Pro League              34
4            Bundesliga              33
25            Süper Lig              33
11         Super League              32
10          Premiership              31
24              Série A              30
8                   MLS              29
28            Superliga              29
16              Serie B              29
3           Allsvenskan              27
27     Liga MX Clausura              25
21          Ekstraklasa              20
23           League One              19
0         2. Bundesliga              19
14          Superligaen              19
29  Professional League              18
1               3. Liga              17
17          Eliteserien              16
15            J1 League              16
2              A-League              15
26           League Two              13
31       Liga DIMAYOR I              13
22           K League 1              12
30              Ligue 2               8
degree centrality
                   club  centrality
9        Premier League    3.322581
13             Liga NOS    2.935484
20              Serie A    2.806452
4            Bundesliga    2.774194
5          Championship    2.709677
12               LaLiga    2.612903
6            Eredivisie    2.548387
7               Ligue 1    2.483871
19              LaLiga2    2.387097
18   Jupiler Pro League    2.225806
28            Superliga    2.225806
24              Série A    2.096774
11         Super League    2.064516
25            Süper Lig    1.806452
8                   MLS    1.741935
16              Serie B    1.645161
10          Premiership    1.548387
14          Superligaen    1.451613
23           League One    1.419355
0         2. Bundesliga    1.387097
17          Eliteserien    1.354839
27     Liga MX Clausura    1.322581
3           Allsvenskan    1.225806
15            J1 League    1.064516
21          Ekstraklasa    0.967742
1               3. Liga    0.967742
30              Ligue 2    0.935484
31       Liga DIMAYOR I    0.935484
29  Professional League    0.903226
26           League Two    0.806452
2              A-League    0.806452
22           K League 1    0.645161
incoming degree centrality
                   club  in_centrality
19              LaLiga2       1.451613
9        Premier League       1.451613
13             Liga NOS       1.387097
12               LaLiga       1.354839
5          Championship       1.354839
6            Eredivisie       1.322581
7               Ligue 1       1.290323
20              Serie A       1.129032
18   Jupiler Pro League       1.096774
4            Bundesliga       1.064516
25            Süper Lig       1.064516
11         Super League       1.032258
10          Premiership       1.000000
24              Série A       0.967742
8                   MLS       0.935484
28            Superliga       0.935484
16              Serie B       0.935484
3           Allsvenskan       0.870968
27     Liga MX Clausura       0.806452
21          Ekstraklasa       0.645161
23           League One       0.612903
0         2. Bundesliga       0.612903
14          Superligaen       0.612903
29  Professional League       0.580645
1               3. Liga       0.548387
17          Eliteserien       0.516129
15            J1 League       0.516129
2              A-League       0.483871
26           League Two       0.419355
31       Liga DIMAYOR I       0.419355
22           K League 1       0.387097
30              Ligue 2       0.258065
outgoing degree centrality
                   club  out_centrality
9        Premier League        1.870968
4            Bundesliga        1.709677
20              Serie A        1.677419
13             Liga NOS        1.548387
5          Championship        1.354839
28            Superliga        1.290323
12               LaLiga        1.258065
6            Eredivisie        1.225806
7               Ligue 1        1.193548
24              Série A        1.129032
18   Jupiler Pro League        1.129032
11         Super League        1.032258
19              LaLiga2        0.935484
14          Superligaen        0.838710
17          Eliteserien        0.838710
8                   MLS        0.806452
23           League One        0.806452
0         2. Bundesliga        0.774194
25            Süper Lig        0.741935
16              Serie B        0.709677
30              Ligue 2        0.677419
15            J1 League        0.548387
10          Premiership        0.548387
27     Liga MX Clausura        0.516129
31       Liga DIMAYOR I        0.516129
1               3. Liga        0.419355
26           League Two        0.387097
3           Allsvenskan        0.354839
21          Ekstraklasa        0.322581
2              A-League        0.322581
29  Professional League        0.322581
22           K League 1        0.258065
Profit made by teams
                   club        profit
4            Bundesliga  3.675262e+10
12               LaLiga  1.996475e+09
9        Premier League  1.423820e+09
6            Eredivisie  8.511900e+08
20              Serie A  4.785060e+08
24              Série A  4.498350e+08
18   Jupiler Pro League  4.113250e+08
13             Liga NOS  4.008780e+08
5          Championship  2.444560e+08
19              LaLiga2  2.202850e+08
0         2. Bundesliga  1.905220e+08
3           Allsvenskan  1.523350e+08
21          Ekstraklasa  1.146220e+08
10          Premiership  1.084750e+08
28            Superliga  8.403300e+07
14          Superligaen  7.115800e+07
30              Ligue 2  7.010000e+07
1               3. Liga  5.088500e+07
17          Eliteserien  3.114000e+07
26           League Two  2.901800e+07
16              Serie B  2.713000e+07
15            J1 League  1.328000e+07
31       Liga DIMAYOR I  1.253600e+07
22           K League 1  6.582000e+06
2              A-League  5.765000e+06
7               Ligue 1  5.620000e+06
29  Professional League -5.350000e+06
8                   MLS -3.073900e+07
23           League One -3.610500e+07
27     Liga MX Clausura -7.931600e+07
25            Süper Lig -2.166330e+08
11         Super League -2.928820e+08

Premier league has the highest centrality measure, while Bundesliga and Championship have the highest out degree and in degree centralities accordingly. Championship made the highest profit in loans and Premier League the lowest.

In [41]:
winter_t = transfers_df[transfers_df["type"] == "winter"]
by_leagues_s = group_league(winter_t)
node_attrs = league_node_attrs(by_leagues_s)
winter_leagues_G = nx.from_pandas_edgelist(by_leagues_s, 'from_league','to_league', ['fee','count','age','type'], create_using=nx.MultiDiGraph)
winter_leagues_G = league_attributes(winter_leagues_G, node_attrs)

Soccer's Winter Transfers Network by teams

In [42]:
call_plot(winter_leagues_G, "Winter Transfers Network by leagues", nx.spring_layout, node_size = "incoming_edges")
Loading BokehJS ...

We can see that the Asian clubs are most active memebers of the winter transfer window being connected to almost all of the other leagues. Chinese league alongside with MlS and Spanish league are the most active member of the transfer market's winter window in terms of buying players.

In [43]:
network_stats(winter_leagues_G)
Network's density
0.4405241935483871
Network's reciprocity
0.6041189931350115
Network's assortavity based on continent
0.18886733821351845
Network's assortavity based on league_class
0.06642737944527564
Network's assortavity based on country
0.07606379436638476
Network's assortavity based on degrees
0.07736973338984807

The network is not very dense in comparison with transfers during all windows, the main attribute for assortiativity degree is continent, and degree of the node has negative effect on it's assortiativity.

Network Statistics

In [44]:
network_info(winter_leagues_G)
outgoing edges
                   club  outgoing_count
15         Super League              22
4            Bundesliga              22
7            Eredivisie              22
12       Premier League              22
5          Championship              19
18             Liga NOS              19
14              Serie A              19
9               Ligue 1              18
0         2. Bundesliga              17
27              Série A              17
26               LaLiga              17
19          Superligaen              17
30            Superliga              16
8    Jupiler Pro League              16
17          Eliteserien              16
16            Süper Lig              15
11                  MLS              14
10              Ligue 2              14
20            J1 League              14
3           Allsvenskan              13
23              LaLiga2              12
28           League One              10
6           Ekstraklasa               9
1               3. Liga               8
31       Liga DIMAYOR I               8
13          Premiership               7
21  Professional League               7
22              Serie B               7
2              A-League               6
24     Liga MX Clausura               5
29           League Two               5
25           K League 1               4
incoming edges
                   club  incoming_count
9               Ligue 1              22
15         Super League              22
11                  MLS              22
16            Süper Lig              21
23              LaLiga2              20
12       Premier League              20
14              Serie A              18
7            Eredivisie              18
26               LaLiga              18
8    Jupiler Pro League              17
27              Série A              17
18             Liga NOS              16
5          Championship              16
4            Bundesliga              15
30            Superliga              14
22              Serie B              14
6           Ekstraklasa              13
0         2. Bundesliga              12
3           Allsvenskan              12
19          Superligaen              11
21  Professional League              11
24     Liga MX Clausura              11
17          Eliteserien              10
20            J1 League              10
13          Premiership              10
1               3. Liga               8
25           K League 1               8
2              A-League               8
31       Liga DIMAYOR I               7
28           League One               6
10              Ligue 2               5
29           League Two               5
degree centrality
                   club  centrality
15         Super League    1.419355
12       Premier League    1.354839
7            Eredivisie    1.290323
9               Ligue 1    1.290323
4            Bundesliga    1.193548
14              Serie A    1.193548
16            Süper Lig    1.161290
11                  MLS    1.161290
5          Championship    1.129032
26               LaLiga    1.129032
18             Liga NOS    1.129032
27              Série A    1.096774
8    Jupiler Pro League    1.064516
23              LaLiga2    1.032258
30            Superliga    0.967742
0         2. Bundesliga    0.935484
19          Superligaen    0.903226
17          Eliteserien    0.838710
3           Allsvenskan    0.806452
20            J1 League    0.774194
6           Ekstraklasa    0.709677
22              Serie B    0.677419
10              Ligue 2    0.612903
21  Professional League    0.580645
13          Premiership    0.548387
1               3. Liga    0.516129
24     Liga MX Clausura    0.516129
28           League One    0.516129
31       Liga DIMAYOR I    0.483871
2              A-League    0.451613
25           K League 1    0.387097
29           League Two    0.322581
incoming degree centrality
                   club  in_centrality
9               Ligue 1       0.709677
15         Super League       0.709677
11                  MLS       0.709677
16            Süper Lig       0.677419
23              LaLiga2       0.645161
12       Premier League       0.645161
14              Serie A       0.580645
7            Eredivisie       0.580645
26               LaLiga       0.580645
8    Jupiler Pro League       0.548387
27              Série A       0.548387
18             Liga NOS       0.516129
5          Championship       0.516129
4            Bundesliga       0.483871
30            Superliga       0.451613
22              Serie B       0.451613
6           Ekstraklasa       0.419355
0         2. Bundesliga       0.387097
3           Allsvenskan       0.387097
19          Superligaen       0.354839
21  Professional League       0.354839
24     Liga MX Clausura       0.354839
17          Eliteserien       0.322581
20            J1 League       0.322581
13          Premiership       0.322581
1               3. Liga       0.258065
25           K League 1       0.258065
2              A-League       0.258065
31       Liga DIMAYOR I       0.225806
28           League One       0.193548
10              Ligue 2       0.161290
29           League Two       0.161290
outgoing degree centrality
                   club  out_centrality
15         Super League        0.709677
4            Bundesliga        0.709677
7            Eredivisie        0.709677
12       Premier League        0.709677
5          Championship        0.612903
18             Liga NOS        0.612903
14              Serie A        0.612903
9               Ligue 1        0.580645
0         2. Bundesliga        0.548387
27              Série A        0.548387
26               LaLiga        0.548387
19          Superligaen        0.548387
30            Superliga        0.516129
8    Jupiler Pro League        0.516129
17          Eliteserien        0.516129
16            Süper Lig        0.483871
11                  MLS        0.451613
10              Ligue 2        0.451613
20            J1 League        0.451613
3           Allsvenskan        0.419355
23              LaLiga2        0.387097
28           League One        0.322581
6           Ekstraklasa        0.290323
1               3. Liga        0.258065
31       Liga DIMAYOR I        0.258065
13          Premiership        0.225806
21  Professional League        0.225806
22              Serie B        0.225806
2              A-League        0.193548
24     Liga MX Clausura        0.161290
29           League Two        0.161290
25           K League 1        0.129032
Profit made by teams
                   club        profit
4            Bundesliga  7.040243e+09
27              Série A  1.964983e+09
14              Serie A  6.275560e+08
15         Super League  2.655510e+08
8    Jupiler Pro League  2.378400e+08
23              LaLiga2  2.202850e+08
18             Liga NOS  1.829650e+08
7            Eredivisie  1.424500e+08
19          Superligaen  1.091230e+08
13          Premiership  1.084750e+08
5          Championship  1.032510e+08
0         2. Bundesliga  8.768800e+07
30            Superliga  7.539800e+07
16            Süper Lig  6.270700e+07
24     Liga MX Clausura  5.711100e+07
6           Ekstraklasa  5.561000e+07
17          Eliteserien  4.112500e+07
3           Allsvenskan  2.830000e+07
10              Ligue 2  2.755000e+07
22              Serie B  2.713000e+07
29           League Two  2.132900e+07
2              A-League  1.346500e+07
25           K League 1  6.582000e+06
20            J1 League  2.160000e+06
21  Professional League -5.780000e+06
1               3. Liga -2.410500e+07
11                  MLS -3.691800e+07
31       Liga DIMAYOR I -4.015400e+07
12       Premier League -1.336250e+08
9               Ligue 1 -1.615100e+08
28           League One -3.325170e+08
26               LaLiga -5.160900e+08

The central member in almost all measures is Chinese super league, as most of the clubs there got richer in a winter period and made a lot of expensive transfers in the winter transfer window.

Network of teams inside one league

In [45]:
league_subgraph = get_subgraph(leagues_G, "country", "England")
In [46]:
network_stats(league_subgraph)
Network's density
0.8975044563279857
Network's reciprocity
0.7944389275074478
Network's assortavity based on continent
nan
Network's assortavity based on league_class
0.10626710325491319
Network's assortavity based on country
nan
Network's assortavity based on degrees
0.09044638445219912
In [47]:
network_info(league_subgraph)
outgoing edges
               club  outgoing_count
6           Chelsea              51
29           Wolves              47
7         Liverpool              47
26      Aston Villa              46
21          Man Utd              42
27        Blackburn              41
24            Derby              40
20        Leicester              39
23           Fulham              39
1          Brighton              38
8           Burnley              37
31        Newcastle              37
3               QPR              37
16            Leeds              36
17        Blackpool              36
5        Stoke City              33
10      Bournemouth              31
19       Sunderland              29
22          Rangers              28
12      Southampton              28
2           Watford              27
28     Peterborough              27
25  Scunthorpe Utd.              26
33         Man City              25
14        Rotherham              25
9        Shrewsbury              23
4          Rochdale              22
32        Fleetwood              22
11        Stevenage              14
0    Bristol Rovers              14
13      Yeovil Town              10
15  Colchester Utd.               9
30     Man City U23               1
18     Sligo Rovers               0
incoming edges
               club  incoming_count
7         Liverpool              48
21          Man Utd              45
26      Aston Villa              44
3               QPR              44
6           Chelsea              44
1          Brighton              40
24            Derby              39
23           Fulham              39
29           Wolves              39
20        Leicester              38
27        Blackburn              38
16            Leeds              37
8           Burnley              37
10      Bournemouth              37
5        Stoke City              34
19       Sunderland              33
31        Newcastle              32
14        Rotherham              30
22          Rangers              30
17        Blackpool              28
9        Shrewsbury              27
12      Southampton              26
25  Scunthorpe Utd.              26
4          Rochdale              25
2           Watford              25
28     Peterborough              23
32        Fleetwood              23
33         Man City              22
0    Bristol Rovers              17
15  Colchester Utd.              15
11        Stevenage              12
13      Yeovil Town               8
18     Sligo Rovers               1
30     Man City U23               1
degree centrality
               club  centrality
6           Chelsea    2.878788
7         Liverpool    2.878788
26      Aston Villa    2.727273
21          Man Utd    2.636364
29           Wolves    2.606061
3               QPR    2.454545
24            Derby    2.393939
27        Blackburn    2.393939
23           Fulham    2.363636
1          Brighton    2.363636
20        Leicester    2.333333
8           Burnley    2.242424
16            Leeds    2.212121
31        Newcastle    2.090909
10      Bournemouth    2.060606
5        Stoke City    2.030303
17        Blackpool    1.939394
19       Sunderland    1.878788
22          Rangers    1.757576
14        Rotherham    1.666667
12      Southampton    1.636364
25  Scunthorpe Utd.    1.575758
2           Watford    1.575758
28     Peterborough    1.515152
9        Shrewsbury    1.515152
33         Man City    1.424242
4          Rochdale    1.424242
32        Fleetwood    1.363636
0    Bristol Rovers    0.939394
11        Stevenage    0.787879
15  Colchester Utd.    0.727273
13      Yeovil Town    0.545455
30     Man City U23    0.060606
18     Sligo Rovers    0.030303
incoming degree centrality
               club  in_centrality
7         Liverpool       1.454545
21          Man Utd       1.363636
26      Aston Villa       1.333333
3               QPR       1.333333
6           Chelsea       1.333333
1          Brighton       1.212121
24            Derby       1.181818
23           Fulham       1.181818
29           Wolves       1.181818
20        Leicester       1.151515
27        Blackburn       1.151515
16            Leeds       1.121212
8           Burnley       1.121212
10      Bournemouth       1.121212
5        Stoke City       1.030303
19       Sunderland       1.000000
31        Newcastle       0.969697
14        Rotherham       0.909091
22          Rangers       0.909091
17        Blackpool       0.848485
9        Shrewsbury       0.818182
12      Southampton       0.787879
25  Scunthorpe Utd.       0.787879
4          Rochdale       0.757576
2           Watford       0.757576
28     Peterborough       0.696970
32        Fleetwood       0.696970
33         Man City       0.666667
0    Bristol Rovers       0.515152
15  Colchester Utd.       0.454545
11        Stevenage       0.363636
13      Yeovil Town       0.242424
18     Sligo Rovers       0.030303
30     Man City U23       0.030303
outgoing degree centrality
               club  out_centrality
6           Chelsea        1.545455
29           Wolves        1.424242
7         Liverpool        1.424242
26      Aston Villa        1.393939
21          Man Utd        1.272727
27        Blackburn        1.242424
24            Derby        1.212121
20        Leicester        1.181818
23           Fulham        1.181818
1          Brighton        1.151515
8           Burnley        1.121212
31        Newcastle        1.121212
3               QPR        1.121212
16            Leeds        1.090909
17        Blackpool        1.090909
5        Stoke City        1.000000
10      Bournemouth        0.939394
19       Sunderland        0.878788
22          Rangers        0.848485
12      Southampton        0.848485
2           Watford        0.818182
28     Peterborough        0.818182
25  Scunthorpe Utd.        0.787879
33         Man City        0.757576
14        Rotherham        0.757576
9        Shrewsbury        0.696970
4          Rochdale        0.666667
32        Fleetwood        0.666667
11        Stevenage        0.424242
0    Bristol Rovers        0.424242
13      Yeovil Town        0.303030
15  Colchester Utd.        0.272727
30     Man City U23        0.030303
18     Sligo Rovers        0.000000
Profit made by teams
               club        profit
27        Blackburn  4.206800e+08
28     Peterborough  2.200550e+08
17        Blackpool  1.701650e+08
16            Leeds  1.537400e+08
14        Rotherham  7.207500e+07
30     Man City U23  6.940000e+07
25  Scunthorpe Utd.  3.628000e+07
9        Shrewsbury  2.136000e+07
15  Colchester Utd.  1.308500e+07
4          Rochdale  1.136500e+07
11        Stevenage  5.170000e+06
0    Bristol Rovers  4.880000e+06
18     Sligo Rovers  6.500000e+04
13      Yeovil Town  0.000000e+00
32        Fleetwood -2.875000e+06
22          Rangers -1.904630e+08
24            Derby -3.069800e+08
3               QPR -4.688200e+08
8           Burnley -6.261150e+08
12      Southampton -9.693750e+08
19       Sunderland -1.454040e+09
5        Stoke City -1.612836e+09
20        Leicester -2.208885e+09
10      Bournemouth -2.372435e+09
23           Fulham -2.605770e+09
31        Newcastle -2.796261e+09
2           Watford -2.907025e+09
29           Wolves -2.959519e+09
26      Aston Villa -3.252285e+09
7         Liverpool -3.883465e+09
1          Brighton -3.950150e+09
6           Chelsea -7.302390e+09
21          Man Utd -8.154325e+09
33         Man City -1.592600e+10
In [48]:
#@title **Network by country**
country = "Italy" #@param ['Japan','Italy','Belgium','Spain','Mexico','Sweden','Saudi Arabia','Turkey','Brazil','Denmark','Netherlands','Portugal','France','Colombia','Germany','China','Norway','Argentina','Poland','England','Scotland','United States','Australia','Korea, South']
league_subgraph = get_subgraph(leagues_G, "country", country)
title = "Spanish league transfers network" #@param string
call_plot(league_subgraph, title, nx.circular_layout, node_color = "name")
Loading BokehJS ...

Node2Vec

In [49]:
!pip install node2vec
from node2vec import Node2Vec
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import plotly.express as px
Requirement already satisfied: node2vec in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (0.3.2)
Requirement already satisfied: tqdm in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from node2vec) (4.36.1)
Requirement already satisfied: gensim in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from node2vec) (3.8.2)
Requirement already satisfied: numpy in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from node2vec) (1.17.2)
Requirement already satisfied: joblib>=0.13.2 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from node2vec) (0.13.2)
Requirement already satisfied: networkx in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from node2vec) (2.3)
Requirement already satisfied: six>=1.5.0 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from gensim->node2vec) (1.12.0)
Requirement already satisfied: smart-open>=1.8.1 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from gensim->node2vec) (1.11.1)
Requirement already satisfied: scipy>=1.0.0 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from gensim->node2vec) (1.3.1)
Requirement already satisfied: decorator>=4.3.0 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from networkx->node2vec) (4.4.0)
Requirement already satisfied: boto in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from smart-open>=1.8.1->gensim->node2vec) (2.49.0)
Requirement already satisfied: requests in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from smart-open>=1.8.1->gensim->node2vec) (2.22.0)
Requirement already satisfied: boto3 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from smart-open>=1.8.1->gensim->node2vec) (1.12.46)
Requirement already satisfied: certifi>=2017.4.17 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from requests->smart-open>=1.8.1->gensim->node2vec) (2019.9.11)
Requirement already satisfied: idna<2.9,>=2.5 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from requests->smart-open>=1.8.1->gensim->node2vec) (2.8)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from requests->smart-open>=1.8.1->gensim->node2vec) (3.0.4)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from requests->smart-open>=1.8.1->gensim->node2vec) (1.24.2)
Requirement already satisfied: botocore<1.16.0,>=1.15.46 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from boto3->smart-open>=1.8.1->gensim->node2vec) (1.15.46)
Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from boto3->smart-open>=1.8.1->gensim->node2vec) (0.9.5)
Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from boto3->smart-open>=1.8.1->gensim->node2vec) (0.3.3)
Requirement already satisfied: docutils<0.16,>=0.10 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from botocore<1.16.0,>=1.15.46->boto3->smart-open>=1.8.1->gensim->node2vec) (0.15.2)
Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /Users/vineti_macbook/anaconda3/lib/python3.7/site-packages (from botocore<1.16.0,>=1.15.46->boto3->smart-open>=1.8.1->gensim->node2vec) (2.8.0)
unable to import 'smart_open.gcs', disabling that module
In [50]:
model = Node2Vec(transfers_G, dimensions=16, p=1, q=2, num_walks=200, walk_length=6) 
Computing transition probabilities: 100%|██████████| 321/321 [00:00<00:00, 488.39it/s]
Generating walks (CPU: 1): 100%|██████████| 200/200 [00:21<00:00,  9.31it/s]
In [52]:
embeddings = model.fit()
In [53]:
def plot_sim_nodes(embeddings, nodes):
  arrays = np.empty((0, 16), dtype='f')
  nodes_array = np.array([])
  sim_nodes_array = np.array([])
  scores_array = np.array([])
  for node in nodes:
    sim_nodes = embeddings.most_similar(node)
    for sim_node in sim_nodes:
      close_node = list(sim_node)[0]
      arrays = np.append(arrays, embeddings[close_node].reshape(1,16), axis=0)
      nodes_array = np.append(nodes_array, [node])
      sim_nodes_array = np.append(sim_nodes_array, [close_node])
      scores_array = np.append(scores_array, list(sim_node)[1])
  Y = PCA(n_components=2).fit_transform(arrays)
  df = pd.DataFrame({'x': [x for x in Y[:, 0]],
                      'y': [y for y in Y[:, 1]],
                      'sim_node': sim_nodes_array,
                      'node': nodes_array,
                      'score' : scores_array
                     })
  fig = px.scatter(df, "x", "y", hover_data=['sim_node',"node", "score"],
                   color = 'node', text = "sim_node", labels = {"x":"", "y":""},
                   title = "Similar teams based on the network",)
  fig.update_traces(textposition='top center')
  fig.show()
In [54]:
plot_sim_nodes(embeddings, ['FC Porto','Benfica'])